# Replication File for
# "Coalition Inclusion Probabilities: A Party-Strategic
# Measure for Predicting Policy and Politics
# Authors: Mark A. Kayser, Matthias Orlowski
# and Jochen Rehmert
# Code to replicate Figures F1, F2 and F3 (Appendix)
# and Table F1, F2 and F3

# set your directory to the folder with
# "estimation_cee_rile.rds"
# "estimation_cee_pf.rds"
# "data_Table2.RDS" 
# "des_data.RDS"
# "valid_data.RDS"
# "fun_predCL.R"
# "fun_getPrIngov.R"
setwd('...')


library(stargazer);library(mclogit);library(flux);library(ROCR)
library(PRROC);library(survival)
# load necessary self-written functions
source("fun_predCL.R")
source("fun_getPrIngov.R")

# ------------------------------------------ #
# ----- Data preparation for Table F3  ----- #
# ------------------------------------------ #
# load estimation data
# load data
dat.rile <- readRDS("estimation_cee_rile.rds")
dat.pf <- readRDS("estimation_cee_pf.rds")

dat.rile$cee <- 0
dat.rile$cee[dat.rile$country %in% c("Bulgaria","Croatia","Czech Republic",
                                     "Estonia","Hungary","Slovenia","Slovakia",
                                     "Poland","Lithuania","Latvia","Romania")] <- 1
dat.pf$cee <- 0
dat.pf$cee[dat.pf$country %in% c("Bulgaria","Croatia","Czech Republic",
                                 "Estonia","Hungary","Slovenia","Slovakia",
                                 "Poland","Lithuania","Latvia","Romania")] <- 1

dat.rile$case_id_num <- as.numeric(as.factor(dat.rile$case_id))
dat.pf$case_id_num <- as.numeric(as.factor(dat.pf$case_id))

# use first coefficients as starting values
fin.oecd.pf <- mclogit(cbind(realg,case_id_num ) ~  dompar + np1 + domp1 + adomp1 +  naminor + namginv + naminwin + nanumpar + namedian + 
                         gdiv1 + sq + cab_hist + anti +  nasecond + nathird , data=dat.pf[dat.pf$cee ==0 ,], 
                       model = TRUE, x = T, y = TRUE ,
                       start= c(1.156, -0.938, 2.879, .576,
                                -1.798, -0.426, 0.869,
                                -0.636, 0.653, -0.641, 2.692,
                                0.077, -1.828, 0.728, 0.998)  )
pf.case <- unique(fin.oecd.pf$model$`cbind(realg, case_id_num)`[,2])
df.oecd.pf <- dat.pf[dat.pf$case_id_num %in% pf.case,]
pf.cases <- unique(dat.pf$case_id[dat.pf$case_id_num %in% pf.case])
fin.oecd.rile <- mclogit(cbind(realg,case_id_num ) ~  dompar + np1 + domp1 + adomp1 +  naminor + namginv + naminwin + nanumpar + namedian + 
                           gdiv1 + sq + cab_hist + anti +  nasecond + nathird , data=dat.rile[dat.rile$cee ==0 & dat.rile$case_id %in% pf.cases ,], model = TRUE, x = T, y = TRUE, 
                         start=c(1.156, -0.938, 2.879, .576,
                                 -1.798, -0.426, 0.869,
                                 -0.636, 0.653, -0.641, 2.692,
                                 0.077, -1.828, 0.728, 0.998) )
rile.case <- unique(fin.oecd.rile$model$`cbind(realg, case_id_num)`[,2])
df.oecd.rile <- dat.rile[dat.rile$case_id_num %in% rile.case,]
rile.cases <- unique(dat.rile$case_id[dat.rile$case_id_num %in% rile.case])

 
fin.cee.rile <- mclogit(cbind(realg,case_id_num ) ~ dompar +  naminor +  naminwin + nanumpar + namedian + 
                          gdiv1 + sq + cab_hist + anti +  nasecond + nathird , data=dat.rile[dat.rile$cee ==1 & dat.rile$case_id != "Poland_Suchocka I" ,], model = TRUE, x = T, y = TRUE, 
                        start=c(1.444,0.402,1.714,
                                -0.195,0.695,-0.428,
                                2.419, 1.524, -0.092,
                                0.096, 0.303) )
rile.case <- unique(fin.cee.rile$model$`cbind(realg, case_id_num)`[,2])
df.cee.rile <- dat.rile[dat.rile$case_id_num %in% rile.case,]

fin.cee.pf <- mclogit(cbind(realg,case_id_num ) ~ dompar +  naminor +  naminwin + nanumpar + namedian + 
                        gdiv1 + sq + cab_hist + anti +  nasecond + nathird , data=dat.pf[dat.pf$cee ==1 & dat.pf$case_id != "Poland_Suchocka I" ,], model = TRUE, x = T, y = TRUE, 
                      start=c(1.444,0.402,1.714,
                              -0.195,0.695,-0.428,
                              2.419, 1.524, -0.092,
                              0.096, 0.303) )
pf.case <- unique(fin.cee.pf$model$`cbind(realg, case_id_num)`[,2])
df.cee.pf <- dat.pf[dat.pf$case_id_num %in% pf.case,]

 
stargazer(fin.oecd.pf, fin.oecd.rile , fin.cee.pf, fin.cee.rile,
          add.lines = list(c("Pot. Cabinets",nrow(fin.oecd.pf$model) ,
                             nrow(fin.oecd.rile$model),length(fin.cee.pf$y) ,
                             length(fin.cee.rile$y)),
                           c("Form. Opp.", length(unique(fin.oecd.pf$model$`cbind(realg, case_id_num)`[,2])),
                             length(unique(fin.oecd.rile$model$`cbind(realg, case_id_num)`[,2])),
                             length(fin.cee.pf$xlevels$`strata(case_id)`), 
                             length(fin.cee.rile$xlevels$`strata(case_id)`))))

 
dat.sample <- merge(dat.pf, dat.rile[,c("cab_comp","case_id","gdiv1")], by = c("case_id","cab_comp"), all = TRUE)

colnames(dat.sample)[9] <- "gdiv1"
colnames(dat.sample)[length(colnames(dat.sample))] <- "logrile"

fin.oecd.same <- mclogit(cbind(realg,case_id_num ) ~  dompar + np1 + domp1 + adomp1 +  naminor + namginv + naminwin + nanumpar + namedian + 
                           gdiv1 + sq + cab_hist + anti +  nasecond + nathird , data=dat.sample[dat.sample$cee ==0 & !is.na(dat.sample$logrile) , ], model = TRUE, x = T, y = TRUE, 
                         start=c(1.156, -0.938, 2.879, .576,
                                 -1.798, -0.426, 0.869,
                                 -0.636, 0.653, -0.641, 2.692,
                                 0.077, -1.828, 0.728, 0.998) )
same.case <- unique(fin.oecd.same$model$`cbind(realg, case_id_num)`[,2])
df.oecd.same <- dat.sample[dat.sample$case_id_num %in% same.case,]

fin.cee.same <- mclogit(cbind(realg,case_id_num ) ~ dompar +  naminor +  naminwin + nanumpar + namedian + 
                          gdiv1 + sq + cab_hist + anti +  nasecond + nathird , data=dat.sample[dat.sample$cee ==1 & !is.na(dat.sample$logrile) & dat.sample$case_id != "Poland_Suchocka I" ,], model = TRUE, x = T, y = TRUE, 
                        start=c(1.444,0.402,1.714,
                                -0.195,0.695,-0.428,
                                2.419, 1.524, -0.092,
                                0.096, 0.303) )
same.case <- unique(fin.cee.same$model$`cbind(realg, case_id_num)`[,2])
df.cee.same <- dat.sample[dat.sample$case_id_num %in% same.case,]


# --------------------- #
# ----- Table F3  ----- #
# --------------------- #
getPr <- function(dat, pred.var = var){
  
  mps <- unlist(strsplit(pred.var, "_"))[2]
  pr_vals <- dat[[pred.var]]
  
  # maximal predicted probability
  dat[[paste("maxpr", mps, sep = "_")]] <- max(pr_vals)
  
  # Difference between highest and second highest predicted probability
  dat[[paste("pr2d", mps, sep = "_")]] <- -diff(sort(pr_vals, decreasing = TRUE))[1]
  
  return(dat)
  
}

df.oecd.pf$pred_coal <- predCL(fin.oecd.pf, new.dat = df.oecd.pf, strat.var = "case_id")
df.oecd.pf <- ddply(df.oecd.pf, .(case_id), getPr, pred.var =  "pred_coal") 
oecd.pf.0 <- df.oecd.pf[['pred_coal']][df.oecd.pf$realg == 0 & !is.na(df.oecd.pf[['pred_coal']])]  
oecd.pf.1 <- df.oecd.pf[['pred_coal']][df.oecd.pf$realg == 1 & !is.na(df.oecd.pf[['pred_coal']])]  
pr.oecd.pf <- pr.curve(scores.class0 = oecd.pf.1, scores.class1 = oecd.pf.0, curve = FALSE)
aupr.oecd.pf <- round(pr.oecd.pf$auc.integral,3)

# -- AUC -- #
cabs = unique(df.oecd.pf$case_id)
count.oecd.pf = length(cabs)

oecd.pf.out = data.frame(matrix(nrow = 0, ncol = 5))
colnames(oecd.pf.out) <- c("party_abbr", "pr_ingov",  "cabinet_id",  "in_gov")

for(ii in cabs){
  if(length(as.character(df.oecd.pf$cab_comp[df.oecd.pf$case_id == ii & df.oecd.pf$realg == 1]))==0){next}
  true.gov <- as.character(df.oecd.pf$cab_comp[df.oecd.pf$case_id == ii & df.oecd.pf$realg == 1])
  case_id <- ii
  # for pty.sep include one tab white space after separater
  oecd.pf <- getPrIngov(df.oecd.pf[df.oecd.pf$case_id == ii,], 
                        prob.var = "pred_coal", 
                        comp.var = "cab_comp", pty.sep = "; ")
  oecd.pf$case_id <- case_id
  oecd.pf$in_gov <- as.numeric(oecd.pf$party_abbr %in% unlist(strsplit(true.gov, "; ")))
  oecd.pf.out <- rbind(oecd.pf.out, oecd.pf)
  cat(ii,"\n")
}

pred.oecd.pf <- prediction(oecd.pf.out[["pr_ingov"]], oecd.pf.out$in_gov) 
perf.oecd.pf <- performance(pred.oecd.pf, "tpr", "fpr")
auc.oecd.pf <- round(flux::auc(data.frame(perf.oecd.pf@x.values)[,1], data.frame(perf.oecd.pf@y.values)[,1]),3)

df.oecd.rile$pred_coal <- predCL(fin.oecd.rile, new.dat = df.oecd.rile, strat.var = "case_id")
df.oecd.rile <- ddply(df.oecd.rile, .(case_id), getPr, pred.var =  "pred_coal") 
oecd.rile.0 <- df.oecd.rile[['pred_coal']][df.oecd.rile$realg == 0 & !is.na(df.oecd.rile[['pred_coal']])]  
oecd.rile.1 <- df.oecd.rile[['pred_coal']][df.oecd.rile$realg == 1 & !is.na(df.oecd.rile[['pred_coal']])]  
pr.oecd.rile <- pr.curve(scores.class0 = oecd.rile.1, scores.class1 = oecd.rile.0, curve = FALSE)
aupr.oecd.rile <- round(pr.oecd.rile$auc.integral,3)

# -- AUC -- #
cabs = unique(df.oecd.rile$case_id)
count.oecd.rile = length(cabs)

oecd.rile.out = data.frame(matrix(nrow = 0, ncol = 5))
colnames(oecd.rile.out) <- c("party_abbr", "pr_ingov",  "cabinet_id",  "in_gov")

for(ii in cabs){
  if(length(as.character(df.oecd.rile$cab_comp[df.oecd.rile$case_id == ii & df.oecd.rile$realg == 1]))==0){next}
  true.gov <- as.character(df.oecd.rile$cab_comp[df.oecd.rile$case_id == ii & df.oecd.rile$realg == 1])
  case_id <- ii
  # for pty.sep include one tab white space after separater
  oecd.rile <- getPrIngov(df.oecd.rile[df.oecd.rile$case_id == ii,], 
                        prob.var = "pred_coal", 
                        comp.var = "cab_comp", pty.sep = "; ")
  oecd.rile$case_id <- case_id
  oecd.rile$in_gov <- as.numeric(oecd.rile$party_abbr %in% unlist(strsplit(true.gov, "; ")))
  oecd.rile.out <- rbind(oecd.rile.out, oecd.rile)
  cat(ii,"\n")
}

pred.oecd.rile <- prediction(oecd.rile.out[["pr_ingov"]], oecd.rile.out$in_gov) 
perf.oecd.rile <- performance(pred.oecd.rile, "tpr", "fpr")
auc.oecd.rile <- round(flux::auc(data.frame(perf.oecd.rile@x.values)[,1], data.frame(perf.oecd.rile@y.values)[,1]),3)


df.oecd.same$pred_coal <- predCL(fin.oecd.same, new.dat = df.oecd.same, strat.var = "case_id")
df.oecd.same <- ddply(df.oecd.same, .(case_id), getPr, pred.var =  "pred_coal") 
oecd.same.0 <- df.oecd.same[['pred_coal']][df.oecd.same$realg == 0 & !is.na(df.oecd.same[['pred_coal']])]  
oecd.same.1 <- df.oecd.same[['pred_coal']][df.oecd.same$realg == 1 & !is.na(df.oecd.same[['pred_coal']])]  
pr.oecd.same <- pr.curve(scores.class0 = oecd.same.1, scores.class1 = oecd.same.0, curve = FALSE)
aupr.oecd.same <- round(pr.oecd.same$auc.integral,3)

# -- AUC -- #
cabs = unique(df.oecd.same$case_id)
count.oecd.same = length(cabs)

oecd.same.out = data.frame(matrix(nrow = 0, ncol = 5))
colnames(oecd.same.out) <- c("party_abbr", "pr_ingov",  "cabinet_id",  "in_gov")

for(ii in cabs){
  if(length(as.character(df.oecd.same$cab_comp[df.oecd.same$case_id == ii & df.oecd.same$realg == 1]))==0){next}
  true.gov <- as.character(df.oecd.same$cab_comp[df.oecd.same$case_id == ii & df.oecd.same$realg == 1])
  case_id <- ii
  # for pty.sep include one tab white space after separater
  oecd.same <- getPrIngov(df.oecd.same[df.oecd.same$case_id == ii,], 
                          prob.var = "pred_coal", 
                          comp.var = "cab_comp", pty.sep = "; ")
  oecd.same$case_id <- case_id
  oecd.same$in_gov <- as.numeric(oecd.same$party_abbr %in% unlist(strsplit(true.gov, "; ")))
  oecd.same.out <- rbind(oecd.same.out, oecd.same)
  cat(ii,"\n")
}

pred.oecd.same <- prediction(oecd.same.out[["pr_ingov"]], oecd.same.out$in_gov) 
perf.oecd.same <- performance(pred.oecd.same, "tpr", "fpr")
auc.oecd.same <- round(flux::auc(data.frame(perf.oecd.same@x.values)[,1], data.frame(perf.oecd.same@y.values)[,1]),3)


df.cee.pf$pred_coal <- predCL(fin.cee.pf, new.dat = df.cee.pf, strat.var = "case_id")
df.cee.pf <- ddply(df.cee.pf, .(case_id), getPr, pred.var =  "pred_coal") 
oecd.pf.0 <- df.cee.pf[['pred_coal']][df.cee.pf$realg == 0 & !is.na(df.cee.pf[['pred_coal']])]  
oecd.pf.1 <- df.cee.pf[['pred_coal']][df.cee.pf$realg == 1 & !is.na(df.cee.pf[['pred_coal']])]  
pr.cee.pf <- pr.curve(scores.class0 = oecd.pf.1, scores.class1 = oecd.pf.0, curve = FALSE)
aupr.cee.pf <- round(pr.cee.pf$auc.integral,3)

# -- AUC -- #
cabs = unique(df.cee.pf$case_id)
count.cee.pf = length(cabs)

cee.pf.out = data.frame(matrix(nrow = 0, ncol = 5))
colnames(cee.pf.out) <- c("party_abbr", "pr_ingov",  "cabinet_id",  "in_gov")

for(ii in cabs){
  if(length(as.character(df.cee.pf$cab_comp[df.cee.pf$case_id == ii & df.cee.pf$realg == 1]))==0){next}
  true.gov <- as.character(df.cee.pf$cab_comp[df.cee.pf$case_id == ii & df.cee.pf$realg == 1])
  case_id <- ii
  # for pty.sep include one tab white space after separater
  cee.pf <- getPrIngov(df.cee.pf[df.cee.pf$case_id == ii,], 
                          prob.var = "pred_coal", 
                          comp.var = "cab_comp", pty.sep = "; ")
  cee.pf$case_id <- case_id
  cee.pf$in_gov <- as.numeric(cee.pf$party_abbr %in% unlist(strsplit(true.gov, "; ")))
  cee.pf.out <- rbind(cee.pf.out, cee.pf)
  cat(ii,"\n")
}

pred.cee.pf <- prediction(cee.pf.out[["pr_ingov"]], cee.pf.out$in_gov) 
perf.cee.pf <- performance(pred.cee.pf, "tpr", "fpr")
auc.cee.pf <- round(flux::auc(data.frame(perf.cee.pf@x.values)[,1], data.frame(perf.cee.pf@y.values)[,1]),3)


df.cee.rile$pred_coal <- predCL(fin.cee.rile, new.dat = df.cee.rile, strat.var = "case_id")
df.cee.rile <- ddply(df.cee.rile, .(case_id), getPr, pred.var =  "pred_coal") 
oecd.rile.0 <- df.cee.rile[['pred_coal']][df.cee.rile$realg == 0 & !is.na(df.cee.rile[['pred_coal']])]  
oecd.rile.1 <- df.cee.rile[['pred_coal']][df.cee.rile$realg == 1 & !is.na(df.cee.rile[['pred_coal']])]  
pr.cee.rile <- pr.curve(scores.class0 = oecd.rile.1, scores.class1 = oecd.rile.0, curve = FALSE)
aupr.cee.rile <- round(pr.cee.rile$auc.integral,3)

# -- AUC -- #
cabs = unique(df.cee.rile$case_id)
count.cee.rile = length(cabs)

cee.rile.out = data.frame(matrix(nrow = 0, ncol = 5))
colnames(cee.rile.out) <- c("party_abbr", "pr_ingov",  "cabinet_id",  "in_gov")

for(ii in cabs){
  if(length(as.character(df.cee.rile$cab_comp[df.cee.rile$case_id == ii & df.cee.rile$realg == 1]))==0){next}
  true.gov <- as.character(df.cee.rile$cab_comp[df.cee.rile$case_id == ii & df.cee.rile$realg == 1])
  case_id <- ii
  # for pty.sep include one tab white space after separater
  cee.rile <- getPrIngov(df.cee.rile[df.cee.rile$case_id == ii,], 
                       prob.var = "pred_coal", 
                       comp.var = "cab_comp", pty.sep = "; ")
  cee.rile$case_id <- case_id
  cee.rile$in_gov <- as.numeric(cee.rile$party_abbr %in% unlist(strsplit(true.gov, "; ")))
  cee.rile.out <- rbind(cee.rile.out, cee.rile)
  cat(ii,"\n")
}

pred.cee.rile <- prediction(cee.rile.out[["pr_ingov"]], cee.rile.out$in_gov) 
perf.cee.rile <- performance(pred.cee.rile, "tpr", "fpr")
auc.cee.rile <- round(flux::auc(data.frame(perf.cee.rile@x.values)[,1], data.frame(perf.cee.rile@y.values)[,1]),3)


df.cee.same$pred_coal <- predCL(fin.cee.same, new.dat = df.cee.same, strat.var = "case_id")
df.cee.same <- ddply(df.cee.same, .(case_id), getPr, pred.var =  "pred_coal") 
oecd.same.0 <- df.cee.same[['pred_coal']][df.cee.same$realg == 0 & !is.na(df.cee.same[['pred_coal']])]  
oecd.same.1 <- df.cee.same[['pred_coal']][df.cee.same$realg == 1 & !is.na(df.cee.same[['pred_coal']])]  
pr.cee.same <- pr.curve(scores.class0 = oecd.same.1, scores.class1 = oecd.same.0, curve = FALSE)
aupr.cee.same <- round(pr.cee.same$auc.integral,3)

# -- AUC -- #
cabs = unique(df.cee.same$case_id)
count.cee.same = length(cabs)

cee.same.out = data.frame(matrix(nrow = 0, ncol = 5))
colnames(cee.same.out) <- c("party_abbr", "pr_ingov",  "cabinet_id",  "in_gov")

for(ii in cabs){
  if(length(as.character(df.cee.same$cab_comp[df.cee.same$case_id == ii & df.cee.same$realg == 1]))==0){next}
  true.gov <- as.character(df.cee.same$cab_comp[df.cee.same$case_id == ii & df.cee.same$realg == 1])
  case_id <- ii
  # for pty.sep include one tab white space after separater
  cee.same <- getPrIngov(df.cee.same[df.cee.same$case_id == ii,], 
                       prob.var = "pred_coal", 
                       comp.var = "cab_comp", pty.sep = "; ")
  cee.same$case_id <- case_id
  cee.same$in_gov <- as.numeric(cee.same$party_abbr %in% unlist(strsplit(true.gov, "; ")))
  cee.same.out <- rbind(cee.same.out, cee.same)
  cat(ii,"\n")
}

pred.cee.same <- prediction(cee.same.out[["pr_ingov"]], cee.same.out$in_gov) 
perf.cee.same <- performance(pred.cee.same, "tpr", "fpr")
auc.cee.same <- round(flux::auc(data.frame(perf.cee.same@x.values)[,1], data.frame(perf.cee.same@y.values)[,1]),3)


stargazer(fin.oecd.pf, fin.oecd.rile , fin.oecd.same, fin.cee.pf, fin.cee.rile, fin.cee.same,
          add.lines = list(c("Pot. Cabinets",nrow(fin.oecd.pf$model) ,
                             nrow(fin.oecd.rile$model),
                             nrow(fin.oecd.same$model),
                             nrow(fin.cee.pf$model) ,
                             nrow(fin.cee.rile$model),
                             nrow(fin.cee.same$model) ),
                           c("Form. Opp.", length(unique(fin.oecd.pf$model$`cbind(realg, case_id_num)`[,2])),
                             length(unique(fin.oecd.rile$model$`cbind(realg, case_id_num)`[,2])),
                             length(unique(fin.oecd.same$model$`cbind(realg, case_id_num)`[,2])),
                             length(unique(fin.cee.pf$model$`cbind(realg, case_id_num)`[,2])), 
                             length(unique(fin.cee.rile$model$`cbind(realg, case_id_num)`[,2])),
                             length(unique(fin.cee.same$model$`cbind(realg, case_id_num)`[,2]))),
                           c("Coalition Level AUPR",aupr.oecd.pf,aupr.oecd.rile,aupr.oecd.same,
                             aupr.cee.pf,aupr.cee.rile,aupr.cee.same),
                           c("Party Level AUC",auc.oecd.pf,auc.oecd.rile,auc.oecd.same,
                             auc.cee.pf,auc.cee.rile,auc.cee.same),
                           c("Log Likelihood",round(fin.oecd.pf$ll,3),
                              round(fin.oecd.rile$ll,3), round(fin.oecd.same$ll,3),
                             round(fin.cee.pf$ll,3), round(fin.cee.rile$ll,3), round(fin.cee.same$ll,3))),
            out = "tableF3.tex")



# ------------------------------------------ #
# ----- Data preparation for Table F2  ----- #
# ------------------------------------------ #

# read data
DT <- readRDS('data_Table2.RDS')

# ---------- data partition ----------#
# Test and training set
set.seed(1104)
# partition dataset while being oblivious to majority situations
trs <- sample(unique(DT$cabinet_id), floor(.8*length(unique(DT$cabinet_id))))
DT$set <- "test"
DT$set[which(DT$cabinet_id %in% trs)] <- "training"

# Generate Interactions
crInteractions <- function(dat){
  
  # single party majority predictor
  dat[["adomp1"]] = (dat[["leg_class"]] == "A")*(dat[["numpar"]] == 1)*(dat[["dompar"]])
  dat[["domp1"]] = (dat[["leg_class"]] != "A")*(dat[["numpar"]] == 1)*(dat[["dompar"]])
  dat[["np1"]] <- as.numeric(dat[["numpar"]] == 1)
  
  dat[["nanumpar"]] = (dat[["leg_class"]] != "A")*(dat[["numpar"]])
  
  dat[["naminwin"]] = (dat[["leg_class"]] != "A")*(dat[["minwin"]])
  dat[["naminor"]] = (dat[["leg_class"]] != "A")*(dat[["minor"]])
  dat[["namginv"]] = (dat[["leg_class"]] != "A")*(dat[["minor"]])*(dat[["inv_rule"]])
  dat[["namedian"]] = (dat[["leg_class"]] != "A")*(dat[["median"]])
  dat[["nasecond"]] = (dat[["leg_class"]] != "A")*(dat[["second"]])
  dat[["nathird"]] = (dat[["leg_class"]] != "A")*(dat[["third"]])
  
  
  dat[["mgodiv1"]] = dat[["odiv1"]]*dat[["minor"]]
  dat[["mgodiff"]] = dat[["odiff"]]*dat[["minor"]]
  dat[["mginvest"]] = dat[["inv_rule"]]*dat[["minor"]]
  
  dat[["confl_sq"]] = dat[["conflict"]]*dat[["sq"]]
  dat[["confl_pm"]] = dat[["conflict"]]*dat[["prevpm"]]	
  dat[["cont_sq"]] = dat[["cont_rule"]]*dat[["sq"]]
  dat[["pe_sq"]] = dat[["post_elc"]]*dat[["sq"]]	
  dat[["chng_pe"]] = dat[["avg_chng"]]*dat[["post_elc"]]	
  dat[["chng_sq"]] = dat[["avg_chng"]]*dat[["sq"]]	
  dat[["chng_pe_sq"]] = dat[["avg_chng"]]*dat[["post_elc"]]*dat[["sq"]]
  
  
  return(dat)	
  
}

DT <- crInteractions(DT)

# ---------- declare test and training data ---------#

# TD is test data without majority situations (i.e., leg_class A)
TD <- DT[which(DT$set == "test" & DT$leg_class != "A"), ]
TD <- na.omit(TD)

# test data containing "majority situations"
TD.maj <- DT[which(DT$set == "test"),]
TD.maj <- na.omit(TD.maj)

# training data set pruned to constant sample for all models
DT2 <- na.omit(DT[which(DT$set == "training"),])

#--- formula ---#
f_nmod <- as.formula(realg ~ dompar + np1 + adomp1 + domp1
                     + naminor + namginv 
                     + naminwin + nanumpar + namedian 
                     + diff 
                     + sq 
                     + anti
                     + nasecond + nathird 
                     + cab_hist 
                     + strata(cabinet_id))

#--- incl. majority situations ---#
m_nmod <- clogit(f_nmod, data = DT2[which(DT2$set == "training" ), ]) 

#--- formula ---#
f_nmod.rile <- as.formula(realg ~ dompar + np1 + adomp1 + domp1
                     + naminor + namginv 
                     + naminwin + nanumpar + namedian 
                     + gdiv1 
                     + sq 
                     + anti
                     + nasecond + nathird 
                     + cab_hist 
                     + strata(cabinet_id))

#--- incl. majority situations ---#
m_nmod.rile <- clogit(f_nmod.rile, data = DT2[which(DT2$set == "training" ), ]) 

# --------------------- #
# ----- Table F2  ----- #
# --------------------- #
stargazer(m_nmod, m_nmod.rile,
          covariate.labels = c(
            "Largest Party in Coalition",
            "Single Party Gov't",
            "Largest Party $\\times$ Single Party Gov't",
            "No-Majority Situation $\\times$ Largest Party $\\times$ Single Party Gov't",
            "No-Majority $\\times$ Minority Government",
            "No-Majority $\\times$ $\\times$ Minority Gov't $\\times$ Investiture Vote",
            "No-Majority $\\times$ Minimal Winning Coalition",
            "No-Majority $\\times$ Number of Parties in Coalition",
            "No-Majority $\\times$ Median Party in Coalition",
            "Ideological Range in Coalition (Party Family)",
            "Ideological Range in Coalition (RiLe)",
            "Status Quo",
            "Anti-Establishment Party in Coalition",
            "No-Majority $\\times$ Second Largest Party",
            "No-Majority $\\times$ Third Largest Party",
            "Cabinet History"
          ), 
          add.lines = list(c("Formation Opportunities",m_nmod$nevent, m_nmod.rile$nevent)),
          out = "tableF2.tex")

#--------------------------------------#

# load data
dat <- readRDS("des_data.RDS")

# --------------------- #
# ----- Figure F3 ----- #
# --------------------- #

# remove caretaker cabinets
dat <- dat[dat$cab_caretaker == 0, ]

library(ggplot2);library(grid)
 
cabinet <- dat[dat$cabinet == "Schmidt III",]

pdf("figureF3.pdf")
 par(mfrow = c(2,1),
    oma = c(4,5,0,0)+0.1,
    mar = c(0,0,1,1) + 0.1,
    mgp = c(2,1,0))
plot(cabinet$cip_gross[cabinet$party_abbr == "CDU-CSU"] ~ as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31")
     , ylim = c(0,1), ylab = "CIP", xaxt='n', col = "white", xlab = "")
lines(cabinet$cip_gross[cabinet$party_abbr == "CDU-CSU"] ~ as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31"), lty = 1, lwd = 2)
segments(as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "CDU-CSU"]), as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "CDU-CSU"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "CDU-CSU"]), as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"] , origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "CDU-CSU"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "CDU-CSU"]), as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"] , origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "CDU-CSU"]))

lines(cabinet$cip_gross[cabinet$party_abbr == "SPD"] ~ as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), lty = 5, lwd = 2)
segments(as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "SPD"]), as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "SPD"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "SPD"]), as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "SPD"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "SPD"]), as.Date(cabinet$date[cabinet$party_abbr == "SPD"] , origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "SPD"]))
lines(cabinet$cip_gross[cabinet$party_abbr == "FDP"] ~ as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), lty = 2, lwd = 2)
segments(as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "FDP"]), as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "FDP"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "FDP"]), as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "FDP"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "FDP"]), as.Date(cabinet$date[cabinet$party_abbr == "FDP"] , origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "FDP"]))

lines(cabinet$cip_gross[cabinet$party_abbr == "Greens"] ~ as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), lty = 3, lwd = 2)
segments(as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "Greens"]), as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "Greens"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "Greens"]), as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_lower[cabinet$party_abbr == "Greens"]))
segments(as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "Greens"]), as.Date(cabinet$date[cabinet$party_abbr == "Greens"] , origin = "1899-12-31"), as.numeric(cabinet$cip_gross_upper[cabinet$party_abbr == "Greens"]))

text(x = as.Date(30750, origin = "1899-12-31" ), y = .6, label = "FDP")
text(x = as.Date(29700, origin = "1899-12-31" ), y = .9, label = "CDU/CSU")
text(x = as.Date(29700, origin = "1899-12-31" ), y = .08, label = "B90/Gr�ne")
text(x = as.Date(29950, origin = "1899-12-31" ), y = .35, label = "SPD")
 mtext("CIP", side = 2, line = 2, cex = 1.5)

# 17.09.1982, FDP withdraws from coalition
plot(cabinet$polls_lagged1[cabinet$party_abbr == "CDU-CSU"] ~ as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31")
     , ylim = c(0,55), ylab = "Vote Intention (Polls)",   col = "white", xlab = "")
lines(cabinet$polls_lagged1[cabinet$party_abbr == "CDU-CSU"] ~ as.Date(cabinet$date[cabinet$party_abbr == "CDU-CSU"], origin = "1899-12-31"), lty = 1, lwd = 2)
lines(cabinet$polls_lagged1[cabinet$party_abbr == "SPD"] ~ as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), lty = 5, lwd = 2)
lines(cabinet$polls_lagged1[cabinet$party_abbr == "FDP"] ~ as.Date(cabinet$date[cabinet$party_abbr == "FDP"], origin = "1899-12-31"), lty = 2, lwd = 2)
lines(cabinet$polls_lagged1[cabinet$party_abbr == "Greens"] ~ as.Date(cabinet$date[cabinet$party_abbr == "Greens"], origin = "1899-12-31"), lty = 3, lwd = 2)
lines(cabinet$polls_lagged1[cabinet$party_abbr == "PDS-Linke"] ~ as.Date(cabinet$date[cabinet$party_abbr == "PDS-Linke"], origin = "1899-12-31"), lty = 4, lwd = 2)
lines(rep(5, nrow(cabinet[cabinet$party_abbr == "SPD",])) ~ as.Date(cabinet$date[cabinet$party_abbr == "SPD"], origin = "1899-12-31"), col = "red", lty = 2)
 
text(x = as.Date(29700, origin = "1899-12-31" ), y = 14, label = "FDP")
text(x = as.Date(29700, origin = "1899-12-31" ), y = 50, label = "CDU/CSU")
text(x = as.Date(29700, origin = "1899-12-31" ), y = 31, label = "SPD")
text(x = as.Date(30150, origin = "1899-12-31" ), y = 13, label = "B90/Gr�ne")
mtext("Vote Intention (Polls)", side = 2, line = 2, cex = 1.5)
dev.off()

#-----------------------------------------------------------------------------

# --------------------------- #
# ----- Figures F1 & F2 ----- #
# --------------------------- #

require(ggplot2)
require(ggridges)
dat.germany <- dat[dat$country == "germany",]
dat.germany$party_abbr[dat.germany$party_abbr == "CDU-CSU"] <- "CDU/\nCSU"
dat.germany$party_abbr[dat.germany$party_abbr == "PDS-Linke"] <- "PDS-\nLinke"
dat.germany$party_abbr[dat.germany$party_abbr == "Greens"] <- "B90/\nGru"

dat.germany$party_abbr <- factor(dat.germany$party_abbr, 
                                 levels = c("AfD","PDS-\nLinke","B90/\nGru","FDP","SPD","CDU/\nCSU"))

# ----- Figure F2 panel B Germany ----- #
pdf("figureF2_Germany.pdf")
ggplot(dat.germany, aes(x = cip_gross, y = party_abbr)) + 
  geom_density_ridges(scale = 0.6) + ylab("") + xlab("CIP") + xlim(0,1) +
  theme(panel.background = element_rect(fill = "white"),
        axis.ticks.y  = element_blank(),
        axis.text = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 14, face = "bold"))
dev.off()

dat.sweden <- dat[dat$country == "sweden",]
dat.sweden$party_abbr <- factor(dat.sweden$party_abbr, 
                                levels = c("V", "NyD" ,"SD","KD","MP","C","FP","MSP","SAP"))

# ----- Figure F2 panel A Sweden ----- #
pdf("figureF2_Sweden.pdf")
ggplot(dat.sweden, aes(x = cip_gross, y = party_abbr)) + 
  geom_density_ridges(scale = 0.6) + ylab("") + xlab("CIP") + xlim(0,1) + 
  theme(panel.background = element_rect(fill = "white"),
        axis.ticks.y  = element_blank(),
        axis.text = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 14, face = "bold"))
 dev.off()



dat.spain <- dat[dat$country == "spain",]
dat.spain <- dat.spain[dat.spain$party_abbr != "BNG",]
dat.spain$party_abbr[dat.spain$party_abbr == "AP-P"] <- "PP"

dat.spain$party_abbr <- factor(dat.spain$party_abbr, 
                               levels = c("UPyD","EHB","ERC","PNV","PCE|IU","P", "CiU" ,"CC","CDS","CDC","C-PC","UCD","PSOE","PP"))

# ----- Figure F1 Spain ----- #
pdf("figureF1_Spain.pdf")
ggplot(dat.spain, aes(x = cip_gross, y = party_abbr)) + 
  geom_density_ridges(scale = 0.6) + ylab("") + xlab("CIP") + xlim(0,1) + 
  theme(panel.background = element_rect(fill = "white"),
        axis.ticks.y  = element_blank(),
        axis.text = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 14, face = "bold"))
dev.off()
# ----- Table F1 ----- #

# load necessary packages
library(stargazer)
# load data
dat <- readRDS("valid_data.RDS")

summary(m.2 <- glm(in_gov ~ pr_ingov_pf, data = dat, family = binomial("logit")))

# ----- Table F1 ----- #
stargazer(m.2, 
          covariate.labels = c("CIP"),
          out = "tableF1.tex")
